{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Initialization of ES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from elasticsearch import Elasticsearch\n",
    "\n",
    "client = Elasticsearch(['host.docker.internal'])\n",
    "indice = \"syslog-2021-02-24,syslog-2021-02-25,syslog-2021-02-26\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. Query All "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "    \"query\": {\n",
    "        \"match_all\": {}\n",
    "    }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query, scroll='100m', size=10000)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "    \n",
    "sid = res['_scroll_id']\n",
    "scroll_size = len(res['hits']['hits'])\n",
    "\n",
    "while scroll_size > 0:\n",
    "    \"Scrolling...\"\n",
    "    print(scroll_size)\n",
    "    data = client.scroll(scroll_id=sid, scroll='2m')\n",
    "    # Update the scroll ID\n",
    "    sid = data['_scroll_id']\n",
    "    # Get the number of results that returned in the last scroll\n",
    "    scroll_size = len(data['hits']['hits'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. Match Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "    \"query\": {\n",
    "        \"match\": {\n",
    "            \"hostname\":\"for.org\"\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. Multi Match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "    \"query\": {\n",
    "        \"multi_match\": {\n",
    "            \"query\": \"up.com ahmadajmi\", \n",
    "            \"fields\":[\"hostname\", \"application\"]\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4. String Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "  \"query\": {\n",
    "    \"query_string\": {\n",
    "      \"query\": \"(for.org) AND (pretty breath) \"\n",
    "    }\n",
    "  }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5. Term Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "   \"query\":{\n",
    "      \"term\":{\"message\":\"pretty\"}\n",
    "   }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "6. Range Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "   \"query\":{\n",
    "      \"range\":{\n",
    "         \"version\":{\n",
    "            \"gte\":2\n",
    "         }\n",
    "      }\n",
    "   }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "7. Exist Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "  \"query\": {\n",
    "    \"exists\": {\n",
    "      \"field\": \"application\"\n",
    "    }\n",
    "  }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "8. Regex Query\n",
    "https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-regexp-query.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Got 10000 Hits:\n",
      "2021-02-24T23:44:56.200Z <29>1 2021-02-24T23:44:56.200Z up.org shaneIxD 2747 ID774 - There's a breach in the warp core, captain\n",
      "2021-02-24T23:44:56.250Z <43>1 2021-02-24T23:44:56.250Z up.org devankoshal 3940 ID420 - Pretty pretty pretty good\n",
      "2021-02-24T23:45:55.395Z <49>2 2021-02-24T23:45:55.395Z up.de benefritz 8523 ID509 - You're not gonna believe what just happened\n",
      "2021-02-24T23:45:55.435Z <90>2 2021-02-24T23:45:55.435Z up.us shaneIxD 7480 ID833 - We're gonna need a bigger boat\n",
      "2021-02-24T23:45:55.695Z <122>1 2021-02-24T23:45:55.695Z up.com Karimmove 2258 ID67 - Take a breath, let it go, walk away\n",
      "2021-02-24T23:45:55.705Z <162>1 2021-02-24T23:45:55.705Z up.com shaneIxD 9784 ID281 - Pretty pretty pretty good\n",
      "2021-02-24T23:45:55.715Z <59>2 2021-02-24T23:45:55.715Z up.us devankoshal 5168 ID985 - Maybe we just shouldn't use computers\n",
      "2021-02-24T23:45:55.825Z <135>1 2021-02-24T23:45:55.825Z up.us meln1ks 3570 ID610 - Pretty pretty pretty good\n",
      "2021-02-24T23:45:55.906Z <104>2 2021-02-24T23:45:55.906Z up.org shaneIxD 590 ID177 - #hugops to everyone who has to deal with this\n",
      "2021-02-24T23:45:55.915Z <76>2 2021-02-24T23:45:55.915Z up.de ahmadajmi 9870 ID427 - There's a breach in the warp core, captain\n"
     ]
    }
   ],
   "source": [
    "query = {\n",
    "  \"query\": {\n",
    "    \"regexp\": {\n",
    "      \"hostname\": {\n",
    "        \"value\": \"up.*\",\n",
    "        \"flags\": \"ALL\",\n",
    "        \"max_determinized_states\": 10000,\n",
    "        \"rewrite\": \"constant_score\"\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "}\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "9. Compount Query https://www.elastic.co/guide/en/elasticsearch/reference/current/compound-queries.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "   \"query\": {\n",
    "      \"bool\" : {\n",
    "         \"must\" : {\n",
    "            \"term\" : { \"hostname\" : \"random.net\" }\n",
    "         },\n",
    "         \"should\": {\n",
    "            \"term\" : { \"application\" : \"ahmadajmi\" }\n",
    "         },\n",
    "         \"minimum_should_match\" : 1,\n",
    "         \"boost\" : 1.0\n",
    "      }\n",
    "   }\n",
    "}\n",
    "\n",
    "\n",
    "res = client.search(index=indice, body=query)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "for hit in res['hits']['hits']:\n",
    "    print(\"%(timestamp)s %(raw)s\" % hit[\"_source\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "10. Count aggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "   \"aggs\":{\n",
    "      \"version_count\":{\n",
    "         \"value_count\":{\n",
    "            \"field\":\"version\"\n",
    "         }\n",
    "      }\n",
    "   }\n",
    "}\n",
    "\n",
    "\n",
    "res = client.search(index=indice, body=query )\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "\n",
    "res['aggregations']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "11. Cardinality aggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "  \"aggs\": {\n",
    "    \"my-agg-name\": {\n",
    "      \"cardinality\": {\n",
    "        \"field\": \"priority\"\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "}\n",
    "    \n",
    "res = client.search(index=indice, body=query, scroll='100m', size=10000)\n",
    "\n",
    "print(\"Got %d Hits:\" % res['hits']['total']['value'])\n",
    "\n",
    "print(res['aggregations'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
